Based on assignments by Lisa Zhang and Jimmy Ba.
In this lab, you will build models to perform image colourization. That is, given a greyscale image, we wish to predict the colour at each pixel. Image colourization is a difficult problem for many reasons, one of which being that it is ill-posed: for a single greyscale image, there can be multiple, equally valid colourings.
To keep the training time manageable we will use the CIFAR-10 data set, which consists of images of size 32x32 pixels. For most of the questions we will use a subset of the dataset. The data loading script is included with the notebooks, and should download automatically the first time it is loaded.
We will be starting with a convolutional autoencoder and tweaking it along the way to improve our perforamnce. Then as a second part of the assignment we will compare the autoencoder approach to conditional generative adversarial networks (cGANs).
In the process, you are expected to learn to:
Submit an HTML file containing all your code, outputs, and write-up from parts A and B. You can produce a HTML file directly from Google Colab. The Colab instructions are provided at the end of this document.
Do not submit any other files produced by your code.
Include a link to your colab file in your submission.
Please use Google Colab to complete this assignment. If you want to use Jupyter Notebook, please complete the assignment and upload your Jupyter Notebook file to Google Colab for submission.
Include a link to your Colab file here. If you would like the TA to look at your Colab file in case your solutions are cut off, please make sure that your Colab file is publicly accessible at the time of submission.
Colab Link:https://colab.research.google.com/drive/1bvDIoaAUpsdRS0ODWXhL9wmLwVMInlZx#scrollTo=QOGjb0yLTx6d
In this part we will construct and compare different autoencoder models for the image colourization task.
Provided are some helper functions for loading and preparing the data. Note that you will need to use the Colab GPU for this assignment.
"""
Colourization of CIFAR-10 Horses via classification.
"""
import argparse
import math
import time
import os
from glob import glob
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import numpy.random as npr
import scipy.misc
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
######################################################################
# Setup working directory
######################################################################
%mkdir -p /content/a3/
%cd /content/a3
######################################################################
# Helper functions for loading data
######################################################################
# adapted from
# https://github.com/fchollet/keras/blob/master/keras/datasets/cifar10.py
import os
import pickle
import sys
import tarfile
import numpy as np
from PIL import Image
from six.moves.urllib.request import urlretrieve
def get_file(fname, origin, untar=False, extract=False, archive_format="auto", cache_dir="data"):
datadir = os.path.join(cache_dir)
if not os.path.exists(datadir):
os.makedirs(datadir)
if untar:
untar_fpath = os.path.join(datadir, fname)
fpath = untar_fpath + ".tar.gz"
else:
fpath = os.path.join(datadir, fname)
print("File path: %s" % fpath)
if not os.path.exists(fpath):
print("Downloading data from", origin)
error_msg = "URL fetch failure on {}: {} -- {}"
try:
try:
urlretrieve(origin, fpath)
except URLError as e:
raise Exception(error_msg.format(origin, e.errno, e.reason))
except HTTPError as e:
raise Exception(error_msg.format(origin, e.code, e.msg))
except (Exception, KeyboardInterrupt) as e:
if os.path.exists(fpath):
os.remove(fpath)
raise
if untar:
if not os.path.exists(untar_fpath):
print("Extracting file.")
with tarfile.open(fpath) as archive:
archive.extractall(datadir)
return untar_fpath
if extract:
_extract_archive(fpath, datadir, archive_format)
return fpath
def load_batch(fpath, label_key="labels"):
"""Internal utility for parsing CIFAR data.
# Arguments
fpath: path the file to parse.
label_key: key for label data in the retrieve
dictionary.
# Returns
A tuple `(data, labels)`.
"""
f = open(fpath, "rb")
if sys.version_info < (3,):
d = pickle.load(f)
else:
d = pickle.load(f, encoding="bytes")
# decode utf8
d_decoded = {}
for k, v in d.items():
d_decoded[k.decode("utf8")] = v
d = d_decoded
f.close()
data = d["data"]
labels = d[label_key]
data = data.reshape(data.shape[0], 3, 32, 32)
return data, labels
def load_cifar10(transpose=False):
"""Loads CIFAR10 dataset.
# Returns
Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
"""
dirname = "cifar-10-batches-py"
origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
path = get_file(dirname, origin=origin, untar=True)
num_train_samples = 50000
x_train = np.zeros((num_train_samples, 3, 32, 32), dtype="uint8")
y_train = np.zeros((num_train_samples,), dtype="uint8")
for i in range(1, 6):
fpath = os.path.join(path, "data_batch_" + str(i))
data, labels = load_batch(fpath)
x_train[(i - 1) * 10000 : i * 10000, :, :, :] = data
y_train[(i - 1) * 10000 : i * 10000] = labels
fpath = os.path.join(path, "test_batch")
x_test, y_test = load_batch(fpath)
y_train = np.reshape(y_train, (len(y_train), 1))
y_test = np.reshape(y_test, (len(y_test), 1))
if transpose:
x_train = x_train.transpose(0, 2, 3, 1)
x_test = x_test.transpose(0, 2, 3, 1)
return (x_train, y_train), (x_test, y_test)
# Download CIFAR dataset
m = load_cifar10()
print("Shape of Train Data:",m[0][0].shape)
print("Shape of Train Labels:",m[0][1].shape)
print("Shape of Test Data:",m[1][0].shape)
print("Shape of Test Labels:",m[1][1].shape)
print(m[0][0][0,0])
When we load the dataset we get a tuple of Train and Test data as output. Which evident from above code. We have 50000 train samples and 10000 test samples of RGB images.
Preprocess the data to select only images of horses. Learning to generate only hourse images will make our task easier. Your function will also convert the colour images to greyscale to create our input data.
#try rgb to grey
from skimage import color
from skimage import io
# select a single category.
HORSE_CATEGORY = 7
# convert colour images into greyscale
def process(xs, ys, max_pixel=256.0, downsize_input=False):
"""
Pre-process CIFAR10 images by taking only the horse category,
shuffling, and have colour values be bound between 0 and 1
Args:
xs: the colour RGB pixel values
ys: the category labels
max_pixel: maximum pixel value in the original data
Returns:
xs: value normalized and shuffled colour images
grey: greyscale images, also normalized so values are between 0 and 1
"""
xs = xs / max_pixel
xs = xs[np.where(ys == HORSE_CATEGORY)[0], :, :, :]
npr.shuffle(xs)
grey = np.mean(xs, axis=1, keepdims=True)
#grey = color.rgb2gray(xs)
if downsize_input:
downsize_module = nn.Sequential(
nn.AvgPool2d(2),
nn.AvgPool2d(2),
nn.Upsample(scale_factor=2),
nn.Upsample(scale_factor=2),
)
xs_downsized = downsize_module.forward(torch.from_numpy(xs).float())
xs_downsized = xs_downsized.data.numpy()
return (xs, xs_downsized)
else:
return (xs, grey)
input_data = process(m[0][0], m[0][1], max_pixel=256.0, downsize_input=False)
m[0][1]
Create a dataloader (or function) to batch the samples.
# dataloader for batching samples
def get_batch(x, y, batch_size):
"""
Generated that yields batches of data
Args:
x: input values
y: output values
batch_size: size of each batch
Yields:
batch_x: a batch of inputs of size at most batch_size
batch_y: a batch of outputs of size at most batch_size
"""
N = np.shape(x)[0]
assert N == np.shape(y)[0]
for i in range(0, N, batch_size):
batch_x = x[i : i + batch_size, :, :, :]
batch_y = y[i : i + batch_size, :, :, :]
yield (batch_x, batch_y)
train_loader = get_batch(input_data[1], m[0][1], 32)
Verify and visualize that we are able to generate different batches of data.
# code to load different batches of horse dataset
print("Loading data...")
(x_train, y_train), (x_test, y_test) = load_cifar10()
print("Transforming data...")
train_rgb, train_grey = process(x_train, y_train)
test_rgb, test_grey = process(x_test, y_test)
# shape of data and labels before selection
print(x_train.shape, y_train.shape)
# shape of training data
print('Training Data: ', train_rgb.shape, train_grey.shape)
# shape of testing data
print('Testing Data: ', test_rgb.shape, test_grey.shape)
# shape of training data
print('Training Data: ', train_rgb.shape, train_grey.shape)
# shape of testing data
print('Testing Data: ', test_rgb.shape, test_grey.shape)
Load Batches
# obtain batches of images
xs, ys = next(iter(get_batch(train_grey, train_rgb, 10)))
print(xs.shape, ys.shape)
Visualization
# visualize 5 train/test images
plt.figure(figsize=(9, 5))
#train = train_grey.numpy()
#test = test_grey.numpy()
for i,img in enumerate(train_grey):
if i >= 5: break
plt.subplot(2, 5, i+1)
img = img.reshape((32,32))
plt.imshow(img)
for i,img in enumerate(test_grey):
if i >= 5: break
plt.subplot(2, 5, 5+i+1)
img = img.reshape((32,32))
plt.imshow(img)
plt.figure(figsize=(9, 5))
#train = train_grey.numpy()
#test = test_grey.numpy()
for i,img in enumerate(train_rgb):
if i >= 5: break
plt.subplot(2, 5, i+1)
img = img.swapaxes(0,1)
img = img.swapaxes(1,2)
plt.imshow(img)
for i,img in enumerate(test_rgb):
if i >= 5: break
plt.subplot(2, 5, 5+i+1)
img = img.swapaxes(0,1)
img = img.swapaxes(1,2)
plt.imshow(img)
There are many ways to frame the problem of image colourization as a machine learning problem. One naive approach is to frame it as a regression problem, where we build a model to predict the RGB intensities at each pixel given the greyscale input. In this case, the outputs are continuous, and so squared error can be used to train the model.
In this section, you will get familar with training neural networks using cloud GPUs. Run the helper code and answer the questions that follow.
Regression Architecture
class RegressionCNN(nn.Module):
def __init__(self, kernel, num_filters):
# first call parent's initialization function
super().__init__()
padding = kernel // 2
self.downconv1 = nn.Sequential(
nn.Conv2d(1, num_filters, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters),
nn.ReLU(),
nn.MaxPool2d(2),)
self.downconv2 = nn.Sequential(
nn.Conv2d(num_filters, num_filters*2, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters*2),
nn.ReLU(),
nn.MaxPool2d(2),)
self.rfconv = nn.Sequential(
nn.Conv2d(num_filters*2, num_filters*2, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters*2),
nn.ReLU())
self.upconv1 = nn.Sequential(
nn.Conv2d(num_filters*2, num_filters, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters),
nn.ReLU(),
nn.Upsample(scale_factor=2),)
self.upconv2 = nn.Sequential(
nn.Conv2d(num_filters, 3, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(3),
nn.ReLU(),
nn.Upsample(scale_factor=2),)
self.finalconv = nn.Conv2d(3, 3, kernel_size=kernel, padding=padding)
def forward(self, x):
out = self.downconv1(x)
out = self.downconv2(out)
out = self.rfconv(out)
out = self.upconv1(out)
out = self.upconv2(out)
out = self.finalconv(out)
return out
Training code
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
def get_torch_vars(xs, ys, gpu=False):
"""
Helper function to convert numpy arrays to pytorch tensors.
If GPU is used, move the tensors to GPU.
Args:
xs (float numpy tenosor): greyscale input
ys (int numpy tenosor): rgb as labels
gpu (bool): whether to move pytorch tensor to GPU
Returns:
Variable(xs), Variable(ys)
"""
xs = torch.from_numpy(xs).float()
ys = torch.from_numpy(ys).float()
if gpu:
xs = xs.cuda()
ys = ys.cuda()
return Variable(xs), Variable(ys)
def train(args, gen=None):
# Numpy random seed
npr.seed(args.seed)
# Save directory
save_dir = "outputs/" + args.experiment_name
# LOAD THE MODEL
if gen is None:
Net = globals()[args.model]
gen = Net(args.kernel, args.num_filters)
# LOSS FUNCTION
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(gen.parameters(), lr=args.learn_rate)
# DATA
print("Loading data...")
(x_train, y_train), (x_test, y_test) = load_cifar10()
print("Transforming data...")
train_rgb, train_grey = process(x_train, y_train, downsize_input=args.downsize_input)
test_rgb, test_grey = process(x_test, y_test, downsize_input=args.downsize_input)
# Create the outputs folder if not created already
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print("Beginning training ...")
if args.gpu:
gen.cuda()
start = time.time()
train_losses = np.zeros(args.epochs)
valid_losses = np.zeros(args.epochs)
valid_accs = []
for epoch in range(args.epochs):
# Train the Model
gen.train() # Change model to 'train' mode
#losses = []
total_train_loss = 0
total_val_loss = 0
for i, (xs, ys) in enumerate(get_batch(train_grey, train_rgb, args.batch_size)):
images_train, labels_train = get_torch_vars(xs, ys, args.gpu)
# Forward + Backward + Optimize
optimizer.zero_grad()
outputs_train = gen(images_train)
train_loss = criterion(outputs_train, labels_train)
train_loss.backward()
optimizer.step()
total_train_loss += train_loss.data.item()
train_losses[epoch] = float(total_train_loss) / (i+1)
print(epoch,"Train Losses:", train_losses[epoch])
gen.eval()
for i, (xs, ys) in enumerate(get_batch(test_grey, test_rgb, args.batch_size)):
images_val, labels_val = get_torch_vars(xs, ys, args.gpu)
outputs_val = gen(images_val)
val_loss = criterion(outputs_val, labels_val)
total_val_loss += val_loss.data.item()
valid_losses[epoch] = float(total_val_loss) / (i+1)
print(epoch, "Validation Losses:", valid_losses[epoch])
print("Train Images Results")
if args.plot:
visual(images_train, labels_train, outputs_train, args.gpu, 1)
print("Final Validation Images Results")
if args.plot:
visual(images_val, labels_val, outputs_val, args.gpu, 1)
plt.subplots(figsize=(6, 4))
plt.plot(range(epoch+1), train_losses, color="blue", label="Training Set")
plt.plot(range(epoch+1), valid_losses, color="red", label="Validation Set")
plt.legend()
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.show()
return gen
Training visualization code
# visualize 5 train/test images
def visual(img_grey, img_real, img_fake, gpu = 0, flag_torch = 0):
if gpu:
img_grey = img_grey.cpu().detach()
img_real = img_real.cpu().detach()
#print(img_real.shape)
img_fake = img_fake.cpu().detach()
if flag_torch:
img_grey = img_grey.numpy()
img_real = img_real.numpy()
img_fake = img_fake.numpy()
if flag_torch == 2:
img_real = np.transpose(img_real[:, :, :, :, :], [0, 4, 2, 3, 1]).squeeze()
img_fake = np.transpose(img_fake[:, :, :, :, :], [0, 4, 2, 3, 1]).squeeze()
#correct image structure
img_grey = np.transpose(img_grey[:5, :, :, :], [0, 2, 3, 1]).squeeze()
img_fake = np.transpose(img_fake[:5, :, :, :], [0, 2, 3, 1])
img_real = np.transpose(img_real[:5, :, :, :], [0, 2, 3, 1])
for i in range(5):
ax = plt.subplot(3, 5, i + 1)
ax.imshow(img_grey[i], cmap='gray')
ax.axis("off")
ax = plt.subplot(3, 5, i + 1 + 5)
ax.imshow(img_real[i])
ax.axis("off")
ax = plt.subplot(3, 5, i + 1 + 10)
ax.imshow(img_fake[i])
ax.axis("off")
plt.show()
Main training loop for regression CNN
#Main training loop for CNN
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "RegressionCNN",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 100,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
Describe the model RegressionCNN. How many convolution layers does it have? What are the filter sizes and number of filters at each layer? Construct a table or draw a diagram.
from torchsummary import summary
model = RegressionCNN(3,32)
print(model)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
summary(model,(1,32,32))
Answer:
Run the regression training code (should run without errors). This will generate some images. How many epochs are we training the CNN model in the given setting?
#Main training loop for CNN
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "RegressionCNN",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 100,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
Answer: We are running total 25 epochs in given training.
Re-train a couple of new models using a different number of training epochs. You may train each new models in a new code cell by copying and modifying the code from the last notebook cell. Comment on how the results (output images, training loss) change as we increase or decrease the number of epochs.
#Main training loop for CNN
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "RegressionCNN",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 100,
"epochs": 30,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
#Main training loop for CNN
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "RegressionCNN",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 100,
"epochs": 50,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
Answer:
A skip connection in a neural network is a connection which skips one or more layer and connects to a later layer. We will introduce skip connections.
Add a skip connection from the first layer to the last, second layer to the second last, etc. That is, the final convolution should have both the output of the previous layer and the initial greyscale input as input. This type of skip-connection is introduced by [3], and is called a "UNet". Following the CNN class that you have completed, complete the init and forward methods of the UNet class. Hint: You will need to use the function torch.cat.
#complete the code
def crop_tensor(target_tensor, tensor):
target_size = target_tensor.size()[2]
tensor_size = tensor.size()[2]
delta = tensor_size - target_size
delta = delta // 2
return tensor[:, :, delta:tensor_size- delta, delta:tensor_size-delta]
class UNet(nn.Module):
def __init__(self, kernel, num_filters, num_colours=3, num_in_channels=1):
super().__init__()
# Useful parameters
stride = 2
padding = kernel // 2
output_padding = 1
############### YOUR CODE GOES HERE ###############
###################################################
self.downconv1 = nn.Sequential(
nn.Conv2d(num_in_channels, num_filters, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters),
nn.ReLU(),)
self.downconv2 = nn.Sequential(
nn.Conv2d(num_filters, num_filters*2, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters*2),
nn.ReLU(),)
self.rfconv = nn.Sequential(
nn.Conv2d(num_filters*2, num_filters*2, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters*2),
nn.ReLU(),)
self.conv_T1 = nn.ConvTranspose2d(num_filters*2, num_filters, kernel_size=kernel,stride = 2, padding= padding,output_padding = 1)
self.conv_T2 = nn.ConvTranspose2d(num_filters, num_filters, kernel_size=kernel,stride = 2, padding= padding,output_padding = 1)
self.upconv1 = nn.Sequential(
nn.Conv2d(num_filters*2, num_filters, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(num_filters),
nn.ReLU(),)
self.upconv2 = nn.Sequential(
nn.Conv2d(num_filters, 3, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(3),
nn.ReLU(),)
self.finalconv = nn.ConvTranspose2d(4, 3, kernel_size=kernel,stride = 1, padding = padding)
self.maxpool = nn.MaxPool2d(2)
def forward(self, x):
############### YOUR CODE GOES HERE ###############
###################################################
out1 = self.downconv1(x)
out2 = self.maxpool(out1)
out3 = self.downconv2(out2)
out4 = self.maxpool(out3)
out5 = self.rfconv(out4)
out = self.conv_T1(out5)
out = self.upconv1(torch.cat([out,out2],1))
out = self.conv_T2(out)
out = self.upconv2(out)
out = self.finalconv(torch.cat([out,x],1))
return out
model = UNet(3,32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
summary(model,(1,32,32))
Train the "UNet" model for the same amount of epochs as the previous CNN and plot the training curve using a batch size of 100. How does the result compare to the previous model? Did skip connections improve the validation loss and accuracy? Did the skip connections improve the output qualitatively? How? Give at least two reasons why skip connections might improve the performance of our CNN models.
# Main training loop for UNet
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "UNet",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 100,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
Compared to previous model UNet performs better as we can see from results. Unet converges quickly in lee numbers of epochs then RegressionCNN model. Skip connection decreased the value of validation losses. Also Unet improved output quality.
Re-train a few more "UNet" models using different mini batch sizes with a fixed number of epochs. Describe the effect of batch sizes on the training/validation loss, and the final image output.
# complete the code
# Main training loop for UNet
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "UNet",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 256,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
# Main training loop for UNet
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "UNet",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.001,
"batch_size": 32,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
filename = 'final_model.sav'
pickle.dump(model, open(filename, 'wb'))
If we increase batch size too much then it affects the models performance, as we can see for 256 batch size. Now we can see that 32 batch size is working properly and converging quickly. So we can say that larger batch sizes takes more time to optimize our model.
In this second half of the assignment we will construct a conditional generative adversarial network for our image colourization task.
To start we will be modifying the previous sample code to construct and train a conditional GAN. We will exploring the different architectures to identify and select our best image colourization model.
Note: This second half of the assignment should be started after the lecture on generative adversarial networks (GANs).
Modify the provided training code to implement a generator. Then test to verify it works on the desired input (Hint: you can reuse some of your earlier autoencoder models here to act as a generator)
class Generator(nn.Module):
def __init__(self, kernel, num_filters, num_colours=3, num_in_channels=1):
super().__init__()
# Useful parameters
stride = 2
padding = kernel // 2
output_padding = 1
filter_no = num_filters
############### YOUR CODE GOES HERE ###############
###################################################
self.upconv_input = nn.Sequential(
nn.Conv2d(1, filter_no*2, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*2),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv_label = nn.Sequential(
nn.Conv2d(1, filter_no*2, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*2),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv_input1 = nn.Sequential(
nn.Conv2d(filter_no*2, filter_no*4, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*4),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv_label1 = nn.Sequential(
nn.Conv2d(filter_no*2, filter_no*4, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*4),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv1 = nn.Sequential(
nn.ConvTranspose2d(filter_no*8, filter_no*8, kernel_size=kernel,stride = 2, padding= padding, output_padding = 1),
nn.BatchNorm2d(filter_no*8),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv2 = nn.Sequential(
nn.ConvTranspose2d(filter_no*8, filter_no*4, kernel_size=kernel,stride = 1, padding= padding),
nn.BatchNorm2d(filter_no*4),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv3 = nn.Sequential(
nn.Upsample(scale_factor=2),
nn.ConvTranspose2d(filter_no*4, 3, kernel_size=kernel,stride = 1, padding= padding),
nn.BatchNorm2d(3),
nn.LeakyReLU(0.2, inplace=True),)
self.finalconv = nn.Sequential(
nn.Conv2d(3, 3, kernel_size=kernel,stride = 1, padding= padding),)
def forward(self,input,labels):
############### YOUR CODE GOES HERE ###############
###################################################
x = self.upconv_input(input)
y = self.upconv_label(labels)
x = self.upconv_input1(x)
y = self.upconv_label1(y)
out = torch.cat([x,y],1)
out = self.upconv1(out)
out = self.upconv2(out)
out = self.upconv3(out)
out = self.finalconv(out)
return out
#test generator architecture
model_gen = Generator(3,32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_gen.to(device)
summary(model_gen,[(1,32,32),(1,32,32)])
Modify the provided training code to implement a discriminator. Then test to verify it works on the desired input.
noise = torch.rand(1,1,32,32)
noise
# discriminator code
class Discriminator(nn.Module):
def __init__(self, kernel, num_filters, num_colours=3, num_in_channels=1):
super().__init__()
# Useful parameters
stride = 2
padding = kernel // 2
output_padding = 1
filter_no = num_filters
############### YOUR CODE GOES HERE ###############
##################################################
self.upconv_input = nn.Sequential(
nn.Conv2d(3, filter_no*2, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*2),
nn.LeakyReLU(0.2, inplace=True),)
self.upconv_label = nn.Sequential(
nn.Conv2d(1, filter_no*2, kernel_size=kernel,stride = 2, padding= padding),
nn.BatchNorm2d(filter_no*2),
nn.LeakyReLU(0.2, inplace=True),)
self.downconv1 = nn.Sequential(
nn.Conv2d(filter_no*4, filter_no*2, kernel_size=kernel, padding=padding),
nn.BatchNorm2d(filter_no*2),
nn.LeakyReLU(0.2, inplace=True),)
self.downconv2 = nn.Sequential(
nn.ConvTranspose2d(filter_no*2, filter_no, kernel_size=kernel,stride = 1, padding= padding),
nn.BatchNorm2d(filter_no),
nn.LeakyReLU(0.2, inplace=True),)
self.downconv3 = nn.Sequential(
nn.ConvTranspose2d(filter_no, 1, kernel_size=kernel,stride = 2, padding= padding, output_padding = 1),
nn.BatchNorm2d(1),
nn.LeakyReLU(0.2, inplace=True),)
self.linear = nn.Sequential(
nn.Linear(32 * 32 * 1, 256),
nn.Dropout(0.5),
nn.Linear(256, 64),
nn.Dropout(0.5),
nn.Linear(64,1),
)
def forward(self, x, img_greyscale):
############### YOUR CODE GOES HERE ###############
###################################################
#noise = torch.rand(2,1,32,32)
#fake_images = model_gen(noise,img_greyscale)
x = self.upconv_input(x)
y = self.upconv_label(img_greyscale)
out = torch.cat([x,y],1)
out = self.downconv1(out)
out = self.downconv2(out)
out = self.downconv3(out)
out = out.view(-1, 1 * 32 * 32)
out = self.linear(out)
out = torch.sigmoid(out)
return out
# test discriminator architecture
model_dis = Discriminator(3,32)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_dis.to(device)
summary(model_dis,[(3,32,32),(1,32,32)])
Modify the provided training code to implement a conditional GAN.
class AttrDict(dict):
def __init__(self, *args, **kwargs):
super(AttrDict, self).__init__(*args, **kwargs)
self.__dict__ = self
def get_torch_vars(xs, ys, gpu=False):
"""
Helper function to convert numpy arrays to pytorch tensors.
If GPU is used, move the tensors to GPU.
Args:
xs (float numpy tenosor): greyscale input
ys (int numpy tenosor): categorical labels
gpu (bool): whether to move pytorch tensor to GPU
Returns:
Variable(xs), Variable(ys)
"""
xs = torch.from_numpy(xs).float()
ys = torch.from_numpy(ys).float() #--> ADDED for cGAN
if gpu:
xs = xs.cuda()
ys = ys.cuda()
return Variable(xs), Variable(ys)
def train(args, cnn=None):
# Set the maximum number of threads to prevent crash in Teaching Labs
# TODO: necessary?
torch.set_num_threads(5)
# Numpy random seed
npr.seed(args.seed)
# Save directory
save_dir = "outputs/" + args.experiment_name
# LOAD THE COLOURS CATEGORIES
# INPUT CHANNEL
num_in_channels = 1 if not args.downsize_input else 3
# LOAD THE MODEL
if cnn is None:
Net = globals()[args.model]
cnn = Generator(args.kernel, args.num_filters)
discriminator = Discriminator(args.kernel, args.num_filters)
# LOSS FUNCTION
criterion = nn.BCELoss()
g_optimizer = torch.optim.Adam(cnn.parameters(), args.learn_rate)
d_optimizer = torch.optim.Adam(discriminator.parameters(), args.learn_rate)
# DATA
print("Loading data...")
(x_train, y_train), (x_test, y_test) = load_cifar10()
print("Transforming data...")
train_rgb, train_grey = process(x_train, y_train, downsize_input=args.downsize_input)
test_rgb, test_grey = process(x_test, y_test, downsize_input=args.downsize_input)
# Create the outputs folder if not created already
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print("Beginning training ...")
if args.gpu:
cnn.cuda()
discriminator.cuda()
start = time.time()
g_train_losses = np.zeros(args.epochs)
g_valid_losses = np.zeros(args.epochs)
d_train_losses = np.zeros(args.epochs)
d_valid_losses = np.zeros(args.epochs)
valid_accs = []
valid_accs = []
for epoch in range(args.epochs):
# Train the Model
cnn.train()
discriminator.train()
g_total_train_loss = 0
d_total_train_loss = 0
g_total_val_loss = 0
d_total_val_loss = 0
for i, (xs, ys) in enumerate(get_batch(train_grey, train_rgb, args.batch_size)):
images_train, labels_train = get_torch_vars(xs, ys, args.gpu)
#--->ADDED 5
img_grey = images_train
img_real = labels_train
batch_size = args.batch_size
#discriminator training
d_optimizer.zero_grad()
# discriminator losses on real images
D_real = discriminator(img_real,img_grey)
labels = torch.zeros(batch_size).cuda()
d_real_loss = criterion(D_real.squeeze(), labels)
# discriminator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images_train = cnn(noise,img_grey)######################
D_fake = discriminator(fake_images_train,img_grey)
labels = torch.ones(batch_size).cuda() # fake labels = 1
d_fake_loss = criterion(D_fake.squeeze(), labels)
# add up losses and update parameters
d_loss = d_real_loss + d_fake_loss
d_total_train_loss += d_loss.data.item()
d_loss.backward()
d_optimizer.step()
# generator training
g_optimizer.zero_grad()
# generator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images = cnn(noise,img_grey)#######################
D_fake = discriminator(fake_images,img_grey)
labels = Variable(torch.zeros(batch_size)).cuda() #flipped labels###############
# compute loss and update parameters
g_loss = criterion(D_fake.squeeze(), labels)
g_total_train_loss += g_loss.data.item()
g_loss.backward()
g_optimizer.step()
g_train_losses[epoch] = float(g_total_train_loss) / (i+1)
d_train_losses[epoch] = float(d_total_train_loss) / (i+1)
print(epoch,"Generator Train Losses:", g_train_losses[epoch])
print(epoch,"Discriminator Train Losses:", d_train_losses[epoch])
cnn.eval()
for i, (xs, ys) in enumerate(get_batch(test_grey, test_rgb, args.batch_size)):
images_val, labels_val = get_torch_vars(xs, ys, args.gpu)
#--->ADDED 5
img_grey = images_val
img_real = labels_val
batch_size = args.batch_size
# discriminator losses on real images
D_real = discriminator(img_real,img_grey)
labels = torch.zeros(batch_size).cuda()
d_real_loss = criterion(D_real.squeeze(), labels)
# discriminator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images_val = cnn(noise,img_grey)######################
D_fake = discriminator(fake_images_val,img_grey)
labels = torch.ones(batch_size).cuda() # fake labels = 1
d_fake_loss = criterion(D_fake.squeeze(), labels)
# add up losses and update parameters
d_loss = d_real_loss + d_fake_loss
d_total_val_loss += d_loss.data.item()
# generator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images = cnn(noise,img_grey)#######################
D_fake = discriminator(fake_images,img_grey)
labels = Variable(torch.zeros(batch_size)).cuda() #flipped labels###############
# compute loss and update parameters
g_loss = criterion(D_fake.squeeze(), labels)
g_total_val_loss += g_loss.data.item()
g_valid_losses[epoch] = float(g_total_val_loss) / (i+1)
d_valid_losses[epoch] = float(d_total_val_loss) / (i+1)
print(epoch,"Generator Validation Losses:", g_valid_losses[epoch])
print(epoch,"Discriminator Validation Losses:", d_valid_losses[epoch])
print("Train Images Results")
if args.plot:
visual(images_train, labels_train, fake_images_train, args.gpu, 1)
print("Final Validation Images Results")
if args.plot:
visual(images_val, labels_val, fake_images_val, args.gpu, 1)
#Generator Plots
plt.subplots(figsize=(6, 4))
plt.plot(range(epoch+1), g_train_losses, color="blue", label="Training Set")
plt.plot(range(epoch+1), g_valid_losses, color="red", label="Validation Set")
plt.legend()
plt.title("Generator Plots")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.show()
#Discriminator Plots
plt.subplots(figsize=(6, 4))
plt.plot(range(epoch+1), d_train_losses, color="blue", label="Training Set")
plt.plot(range(epoch+1), d_valid_losses, color="red", label="Validation Set")
plt.legend()
plt.title("Discriminator Plots")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.show()
return cnn
Train a conditional GAN for image colourization.
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "Generator",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.0001,
"batch_size": 50,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
}
args.update(args_dict)
cnn = train(args)
#batch size of 50 with 100 epochs seamed to work
How does the performance of the cGAN compare with the autoencoder models that you tested in the first half of this assignment?
Answer: Autoencoder was easy to train and tune, while in case of cGAN it is tough to train the model and the results we are achieving in case of cGAN are little bit poor as compared to autoencoder, since we are generating images from random noise. But if we involve enough complexity in our model then we can achieve better results.
A colour space is a choice of mapping of colours into three-dimensional coordinates. Some colours could be close together in one colour space, but further apart in others. The RGB colour space is probably the most familiar to you, the model used in in our regression colourization example computes squared error in RGB colour space. But, most state of the art colourization models do not use RGB colour space. How could using the RGB colour space be problematic? Your answer should relate how human perception of colour is different than the squared distance. You may use the Wikipedia article on colour space to help you answer the question.
Answer:
RGB is a device-dependent color model and can only really be used successfully on screens and, in some cases, conventional photography.
This refers to the colors you are able to see on the screen right now, it does not translate the same way through printing.
In most printing processes, RGB color space is usually converted to CMYK before creating the artwork.
Essentially, any colors produced with RGB on the digital space will not create the same output on the physical print.
This is based on the color gamut, which in other words means that translating RGB to CMYK usually allows the color to convert only as close to the original appearance of the display screen.
Therefore, working in a CMYK color space allows the artwork to look exactly or as close as possible to the real-life design you will get when it becomes printed on paper or packaging.
ref:https://pakfactory.com/blog/rgb-color-model-printing-packaging/
At this point we have trained a few different generative models for our image colourization task with varying results. What makes this work exciting is that there many other approaches we could take. In this part of the assignment you will be exploring at least one of several approaches towards improving our performance on the image colourization task. Some well known approaches you can consider include:
Other interesting approaches include:
A great example of some of these different approaches can be found in a blog post by Moein Shariatnia.
Note you are only required to pick one of the suggested modifications.
We will implement L1 loss along with discriminator based loss
# provide your code here
def train_modified(args, cnn=None):
# Set the maximum number of threads to prevent crash in Teaching Labs
# TODO: necessary?
torch.set_num_threads(5)
# Numpy random seed
npr.seed(args.seed)
# Save directory
save_dir = "outputs/" + args.experiment_name
# LOAD THE COLOURS CATEGORIES
# INPUT CHANNEL
num_in_channels = 1 if not args.downsize_input else 3
# LOAD THE MODEL
if cnn is None:
Net = globals()[args.model]
cnn = Generator(args.kernel, args.num_filters)
discriminator = Discriminator(args.kernel, args.num_filters)
# LOSS FUNCTION
GANcriterion = nn.BCELoss()
L1criterian = nn.L1Loss()
g_optimizer = torch.optim.Adam(cnn.parameters(), args.learn_rate)
d_optimizer = torch.optim.Adam(discriminator.parameters(), args.learn_rate)
# DATA
print("Loading data...")
(x_train, y_train), (x_test, y_test) = load_cifar10()
print("Transforming data...")
train_rgb, train_grey = process(x_train, y_train, downsize_input=args.downsize_input)
test_rgb, test_grey = process(x_test, y_test, downsize_input=args.downsize_input)
# Create the outputs folder if not created already
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print("Beginning training ...")
if args.gpu:
cnn.cuda()
discriminator.cuda()
start = time.time()
g_train_losses = np.zeros(args.epochs)
g_valid_losses = np.zeros(args.epochs)
d_train_losses = np.zeros(args.epochs)
d_valid_losses = np.zeros(args.epochs)
valid_accs = []
valid_accs = []
for epoch in range(args.epochs):
# Train the Model
cnn.train()
discriminator.train()
g_total_train_loss = 0
d_total_train_loss = 0
g_total_val_loss = 0
d_total_val_loss = 0
for i, (xs, ys) in enumerate(get_batch(train_grey, train_rgb, args.batch_size)):
images_train, labels_train = get_torch_vars(xs, ys, args.gpu)
#--->ADDED 5
img_grey = images_train
img_real = labels_train
batch_size = args.batch_size
#discriminator training
d_optimizer.zero_grad()
# discriminator losses on real images
D_real = discriminator(img_real,img_grey)
labels = torch.zeros(batch_size).cuda()
d_real_loss = GANcriterion(D_real.squeeze(), labels)
# discriminator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images_train = cnn(noise,img_grey)######################
D_fake = discriminator(fake_images_train,img_grey)
labels = torch.ones(batch_size).cuda() # fake labels = 1
d_fake_loss = GANcriterion(D_fake.squeeze(), labels)
# add up losses and update parameters
d_loss = d_real_loss + d_fake_loss
d_total_train_loss += d_loss.data.item()
d_loss.backward()
d_optimizer.step()
# generator training
g_optimizer.zero_grad()
# generator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images = cnn(noise,img_grey)#######################
D_fake = discriminator(fake_images,img_grey)
labels = Variable(torch.zeros(batch_size)).cuda() #flipped labels###############
# compute loss and update parameters
g_loss_gan = GANcriterion(D_fake.squeeze(), labels)
g_loss_l1 = L1criterian(fake_images,img_real)*args.lambda_l1
g_loss = g_loss_gan + g_loss_l1
g_total_train_loss += g_loss.data.item()
g_loss.backward()
g_optimizer.step()
g_train_losses[epoch] = float(g_total_train_loss) / (i+1)
d_train_losses[epoch] = float(d_total_train_loss) / (i+1)
print(epoch,"Generator Train Losses:", g_train_losses[epoch])
print(epoch,"Discriminator Train Losses:", d_train_losses[epoch])
cnn.eval()
for i, (xs, ys) in enumerate(get_batch(test_grey, test_rgb, args.batch_size)):
images_val, labels_val = get_torch_vars(xs, ys, args.gpu)
#--->ADDED 5
img_grey = images_val
img_real = labels_val
batch_size = args.batch_size
# discriminator losses on real images
D_real = discriminator(img_real,img_grey)
labels = torch.zeros(batch_size).cuda()
d_real_loss = GANcriterion(D_real.squeeze(), labels)
# discriminator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images_val = cnn(noise,img_grey)######################
D_fake = discriminator(fake_images_val,img_grey)
labels = torch.ones(batch_size).cuda() # fake labels = 1
d_fake_loss = GANcriterion(D_fake.squeeze(), labels)
# add up losses and update parameters
d_loss = d_real_loss + d_fake_loss
d_total_val_loss += d_loss.data.item()
# generator losses on fake images
noise = Variable(torch.randn(batch_size,1,32,32)).cuda()
fake_images = cnn(noise,img_grey)#######################
D_fake = discriminator(fake_images,img_grey)
labels = Variable(torch.zeros(batch_size)).cuda() #flipped labels###############
# compute loss and update parameters
g_loss_gan = GANcriterion(D_fake.squeeze(), labels)
g_loss_l1 = L1criterian(fake_images,img_real)*args.lambda_l1
g_loss = g_loss_gan + g_loss_l1
g_total_val_loss += g_loss.data.item()
g_valid_losses[epoch] = float(g_total_val_loss) / (i+1)
d_valid_losses[epoch] = float(d_total_val_loss) / (i+1)
print(epoch,"Generator Validation Losses:", g_valid_losses[epoch])
print(epoch,"Discriminator Validation Losses:", d_valid_losses[epoch])
print("Train Images Results")
if args.plot:
visual(images_train, labels_train, fake_images_train, args.gpu, 1)
print("Final Validation Images Results")
if args.plot:
visual(images_val, labels_val, fake_images_val, args.gpu, 1)
#Generator Plots
plt.subplots(figsize=(6, 4))
plt.plot(range(epoch+1), g_train_losses, color="blue", label="Training Set")
plt.plot(range(epoch+1), g_valid_losses, color="red", label="Validation Set")
plt.legend()
plt.title("Generator Plots")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.show()
#Discriminator Plots
plt.subplots(figsize=(6, 4))
plt.plot(range(epoch+1), d_train_losses, color="blue", label="Training Set")
plt.plot(range(epoch+1), d_valid_losses, color="red", label="Validation Set")
plt.legend()
plt.title("Discriminator Plots")
plt.xlabel("Number of Epochs")
plt.ylabel("Loss")
plt.show()
return cnn
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "Generator",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.0001,
"batch_size": 50,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
"lambda_l1": 200,
}
args.update(args_dict)
cnn = train_modified(args)
args = AttrDict()
args_dict = {
"gpu": True,
"valid": False,
"checkpoint": "",
"colours": "./data/colours/colour_kmeans24_cat7.npy",
"model": "Generator",
"kernel": 3,
"num_filters": 32,
'learn_rate':0.0001,
"batch_size": 50,
"epochs": 25,
"seed": 0,
"plot": True,
"experiment_name": "colourization_cnn",
"visualize": False,
"downsize_input": False,
"lambda_l1": 50,
}
args.update(args_dict)
cnn = train_modified(args)
Retrieve sample pictures from online and demonstrate how well your best model performs. Provide all your code.
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import torchvision
import torch
from torchvision import datasets, transforms
from PIL import Image
!unzip "/content/test_images.zip" -d "/content/horse"
transform = transforms.Compose([transforms.ToTensor()])
transform_grey = transforms.Compose([transforms.Grayscale(),transforms.ToTensor()
])
test_set = datasets.ImageFolder('/content/horse',transform = transform)
grey_set = datasets.ImageFolder('/content/horse',transform = transform_grey)
device = torch.device('cuda')
convert_to_image = transforms.Compose([transforms.ToPILImage(), transforms.Resize((128, 128))])
#loaded_cnn = pickle.load(open(filename, 'rb'))
#criterian = nn.BCELoss()
loss = 0
cnn.eval()
discriminator = Discriminator(args.kernel,args.num_filters)
discriminator.to(device)
i = 0
fig,ax = plt.subplots(3,10,figsize = (20,10))
#fig.tight_layout()
for imgs_in_batch, labels in iter(torch.utils.data.DataLoader(grey_set,1)):
imgs_in_batch = imgs_in_batch.to(device)
noise = Variable(torch.randn(1,1,32,32)).cuda()
fake_images = cnn(noise,imgs_in_batch)
labels = torch.ones(1).cuda()
d_fake_loss = criterian(fake_images,labels)
# Displaying Images
gray_image = convert_to_image(imgs_in_batch[0])
actual_image = convert_to_image(test_set[i][0])
colored_image = convert_to_image(fake_images[0])
ax[0,i].imshow(gray_image, cmap='gray')
ax[1,i].imshow(actual_image)
ax[2,i].imshow(colored_image)
i = i+1
plt.show()
Detailed instructions for saving to HTML can be found here. Provided below are a summary of the instructions:
(1) download your ipynb file by clicking on File->Download.ipynb
(2) reupload your file to the temporary Google Colab storage (you can access the temporary storage from the tab to the left)
(3) run the following:
%%shell
jupyter nbconvert --to html LAB_3_Generating_Data.ipynb
(4) the html file will be available for download in the temporary Google Colab storage
(5) review the html file and make sure all the results are visible before submitting your assignment to Quercus